home *** CD-ROM | disk | FTP | other *** search
- #!/bin/sh
-
- # Simple indexing test using HTTrack
- # A "real" script/program would use advanced search, and
- # use dichotomy to find the word in the index.txt file
- # This script is really basic and NOT optimized, and
- # should not be used for professional purpose :)
-
- TESTSITE="http://localhost/"
-
- # Create an index if necessary
- if ! test -f "index.txt"; then
- echo "Building the index .."
- rm -rf test
- httrack --display "$TESTSITE" -%I -O test
- mv test/index.txt ./
- fi
-
- # Convert crlf to lf
- if test "`head index.txt -n 1 | tr '\r' '#' | grep -c '#'`" = "1"; then
- echo "Converting index to Unix LF style (not CR/LF) .."
- mv -f index.txt index.txt.old
- cat index.txt.old|tr -d '\r' > index.txt
- fi
-
- keyword=-
- while test -n "$keyword"; do
- printf "Enter a keyword: "
- read keyword
-
- if test -n "$keyword"; then
- FOUNDK="`grep -niE \"^$keyword\" index.txt`"
-
- if test -n "$FOUNDK"; then
- if ! test `echo "$FOUNDK"|wc -l` = "1"; then
- # Multiple matches
- printf "Found multiple keywords: "
- echo "$FOUNDK"|cut -f2 -d':'|tr '\n' ' '
- echo ""
- echo "Use keyword$ to find only one"
- else
- # One match
- N=`echo "$FOUNDK"|cut -f1 -d':'`
- PM=`tail +$N index.txt|grep -nE "\("|head -n 1`
- if ! echo "$PM"|grep "ignored">/dev/null; then
- M=`echo $PM|cut -f1 -d':'`
- echo "Found in:"
- cat index.txt | tail "+$N" | head -n "$M" | grep -E "[0-9]* " | cut -f2 -d' '
- else
- echo "keyword ignored (too many hits)"
- fi
- fi
- else
- echo "not found"
- fi
-
- fi
- done
-
-